#=====Lecture 9 r code: statistics #===== Plot the standard normal curve x <- seq(-4,4,0.001) plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i", ylim=c(0,0.45), xlab="Value of x", ylab="Density") # create the explanatory plot par(mfrow=c(2,1)) x <- seq(-4,4,0.001) plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i", ylim=c(0,0.45),xlab="Value of x", ylab="Density", xaxt="n") lines(x=x[x<(-1.96)], y=dnorm(x[x<(-1.96)]), type="h", col="#77777777") axis(at=c(-4,-1.96,0,1.96, 4),lab=c("-4","-x", "0","+x", "4"),side=1) plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i", ylim=c(0,0.45),xlab="Value of x", ylab="Density", xaxt="n") lines(x=x[x<1.96], y=dnorm(x[x<1.96]), type="h", col="#77777777") axis(at=c(-4,-1.96,0,1.96, 4),lab=c("-4","-x", "0","+x", "4"),side=1) ## show qnorm quants <- qnorm(c(0.01,0.025,0.05,0.95,0.975,0.99)) round(quants,2) ## show pnorm pnorm(quants) pnorm(-2.33) pnorm(-2.33, lower.tail=F) pnorm(-2.33) + pnorm(-2.33, lower.tail=F) # demonstrate dnorm and rnorm dnorm(quants) rnorm(n=10) #===random numbers ?.Random.seed ?set.seed .Random.seed[1:4] rnorm(1) .Random.seed[1:4] rnorm(1) .Random.seed[1:4] set.seed(100) rnorm(3) rnorm(3) set.seed(100) rnorm(3) rnorm(3) 2^31-1 -2^31 rnorm(5) #=====using the sample function #roll some dice 10 times sample(1:6,size=10,replace=T) #flip a coin 10 times sample(c("H","T"), size=10, replace=T) #select five cards from a deck cards <- paste(rep(c("A",2:10,"J","Q","K"),4), c("Club","Diamond","Heart","Spade")) sort(cards) #check that this worked! sample(cards,5) #times out of 10000 you select 0,1,2,3,4,5 hearts x <- vector(length=10000) for (i in 1:10000) { x[i] <- length(grep("Heart", sample(cards,5))) } table(x) #Note: grep searches for a matching regular expression and return the number of matches. #first two cards both Aces x <- vector(length=10000) for (i in 1:10000) { x[i] <- 2==length(grep("A", sample(cards,2))) } table(x) #4 aces out of 7 cards (Texas hold 'em) x <- vector(length=100000) for (i in 1:100000) { x[i] <- length(grep("A", sample(cards,7)))>=4 } table(x) #=======Hands-on exercise 1 require(plyr) require(TeachingDemos) Fbomb<-rnorm(100, mean=80, sd=10) f<-length(which(Fbomb>100 && Fbomb<60))/length(Fbomb) gib<-pnorm(0.24, mean=0,sd=1, lower.tail=T,log.p=F) gib #=====exploratory data analysis summary(iris) fivenum(iris[,3]) #min, lower quart, mean, upper q, max for petal length sd(iris[,3]) # sd for petal length apply(iris[,1:4], 2, sd, na.rm=T) # sd for each variable, omit spp. apply(iris[,1:4], 2, range, na.rm=T) #min and max for each variable, omit spp. boxplot(iris) pairs(iris[,1:4]) pairs(iris[,1:4], main = "Edgar Anderson's Iris Data", pch = 19, col = rep(c("#FF000055", "#00FF0055", "#0000FF55"), c(50,50,50))) pairs(iris[,1:4], main = "Edgar Anderson's Iris Data", pch = 21, bg = rep(c("red", "green3", "blue"), table(iris$Species))) #====histogram hist(iris$Petal.Length, main="", col="gray", xlab="Petal length (cm)") table(iris$Species) table(iris$Petal.Length) par(mfrow=c(1,3)) hist(iris[iris$Species=="setosa",]$Petal.Length, ylim=c(0,50), breaks=seq(1,7,0.5), main="setosa", col="gray", xlab="Petal length (cm)") hist(iris[iris$Species=="versicolor",]$Petal.Length, ylim=c(0,50), breaks=seq(1,7,0.5), main="versicolor", col="gray", xlab="Petal length (cm)") hist(iris[iris$Species=="virginica",]$Petal.Length, ylim=c(0,50), breaks=seq(1,7,0.5), main="virginica", col="gray", xlab="Petal length (cm)") #==adding density curves par(mfrow=c(1,1)) hist(iris$Petal.Length, main="", col="gray", xlab="Petal length (cm)", freq=F) lines(density(iris$Petal.Length), lwd=2, col="red") lines(density(iris$Petal.Length,adjust=0.4), lwd=2, col="blue") lines(density(iris$Petal.Length,adjust=2), lwd=2, col="green3") #=====using t-test methodA <- c(79.982, 80.041, 80.018, 80.041, 80.03, 80.029, 80.038, 79.968, 80.049, 80.029, 80.019, 80.002, 80.022) methodB <- c(80.02, 79.939, 79.98, 79.971, 79.97, 80.029, 79.952, 79.968) #briefly examine assumptions boxplot(methodA, methodB, names = c("A", "B")) par(mfrow = c(1,2)) qqnorm(methodA) qqline(methodA) qqnorm(methodB); qqline(methodB) t.test(methodA, methodB) resultsAB <- t.test(methodA, methodB) names(resultsAB) resultsAB$p.value var(methodA) var(methodB) #test of equal variances var.test(methodA, methodB) #use equal variances t.test(methodA, methodB, var.equal = TRUE) #Mann-Whitney test wilcox.test(methodA, methodB) #======Hands-on Exercise 2 farb<-rnorm(30, mean=0, sd=1) farg<-rexp(30, rate=1) farp<-rt(30,df=Inf) farn<-rcauchy(30, location=0,scale=1) qqnorm(farb) qqline(farb) qqnorm(farg) qqline(farg) qqnorm(farp) qqline(farp) qqnorm(farn) qqline(farn)